package org.luosl.webmagicx.urlcreator

import org.luosl.webmagicx.conf.{XmlProps, SpiderConf}
import org.luosl.webmagicx.conf.PropType._
import org.luosl.webmagicx.conf.MatcherConverters._
import us.codecraft.webmagic.{Request, Task}

import scala.util.matching.Regex

/**
  * 基于范围的 url生成器
  * @param props  props
  */
class RangeUrlCreator(sc:SpiderConf, task:Task, props:XmlProps) extends AbstractUrlCreator(sc, task, props){

  /** 步长 */
  private val step:Int = props.valueOrDefault("step")(intType)(1)

  /** 匹配 数字范围的正则表达式 */
  private val regex:Regex = "\\[\\d+\\-\\d+\\]".r

  /** 抽取范围 起始值和结束值的正则表达 */
  private val matchRegex:Regex = "(\\[)(\\d+)(\\-)(\\d+)(\\])".r

  /**
    * 基于范围的参数
    * key 参数名称
    * step 参数步长
    * value 参数值
    */
  private val rangeParams:Map[String, (Int, String)] = props.props("params", "rangeParam")
    .map{ prop=>
      val key:String = prop.value("key")(strType)
      val value =  prop.valueOrDefault("step")(intType)(1) -> prop.value("value")(strType)
      key -> value
    }.toMap
  

  override def requests(): Seq[Request] = generateUrls(url).map(new Request(_).setMethod(method))



  /**
    * 获取带参数的 Requests
    *
    * @return
    */
  override def requestsWithParam(): Seq[Request] = {
    for{
      req <- requests()
      param <- generateRangeParam()
    }yield withParam(req, param)
  }

  /**
    * 根据正则表达式生成url
    * @param urlStr str
    * @return
    */
  def generateUrls(urlStr:String):Seq[String] = {
    val matchOpt:Option[String] = regex.findFirstIn(urlStr)
    matchOpt match {
      case None => Seq(urlStr)
      case Some(matchRegex(_, start, _, end, _)) =>
        start.toInt.to(end.toInt, step).flatMap(num=> generateUrls(regex.replaceFirstIn(urlStr, num.toString)))
    }
  }

  /**
    * 生成基于范围的 参数
    * @return
    */
  def generateRangeParam():Seq[Map[String, String]] = {
    /**
      * 递归生成参数
      * @param paramMap paramMap
      * @param rangeParams rangeParams
      * @return
      */
    def generateParam(paramMap:Map[String,String], rangeParams:Seq[(String, List[String])]):Seq[Map[String, String]] ={
      rangeParams match{
        case Seq() => Seq(paramMap)
        case _ =>
          val (key, params):(String, List[String]) = rangeParams.head
          val newParams = params.foldLeft(List.empty[Map[String,String]])((ls, item)=> (paramMap + (key -> item)) :: ls)
          newParams.flatMap(pm=> generateParam(pm, rangeParams.tail))
      }
    }
    // 根据 rangeParam 的正则式 解析出 rangeParam 的每一个值
    val paramMap:Map[String, List[String]] =
      rangeParams.filter(_._2._2.matches(regex.regex)).foldLeft(Map.empty[String,List[String]]){ (map, item)=>
      val value:String = item._2._2
      val paramStep:Int = item._2._1
      value match {
        case matchRegex(_, start, _, end, _) =>
          val valueList:List[String] = start.toInt.to(end.toInt, paramStep).toList.map(_.toString)
          map + (item._1 -> valueList)
      }
    }
    generateParam(params, paramMap.toSeq)
  }

}